Analysis

Count data

Data wrangling

EA sampling sites

# WD
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")

# Load data
counts <- read_csv("TraC_Fish_Counts_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")

# Join count data with site information
counts_clean <- counts %>% left_join(sites, by = "SITE_ID") %>%
  mutate(EVENT_DATE = ymd(EVENT_DATE),
         EVENT_YEAR = year(EVENT_DATE),
         EVENT_MONTH = month(EVENT_DATE),
         SEASON = if_else(EVENT_MONTH %in% 4:9, "Summer", "Winter"))

counts_clean <- counts_clean %>%
  select(
    SITE_ID,
    SITE_NAME = SITE_NAME.x,
    SITE_PARENT_NAME,
    EVENT_DATE,
    SPECIES_NAME,
    LATIN_NAME,
    FISH_COUNT,
    TOP_TIER_SITE,
    SITE_RANKED_NGR,
    SITE_RANKED_EASTING,
    SITE_RANKED_NORTHING)

# Convert dates and extract components
counts_clean <- counts_clean %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_DATE_YEAR = year(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_DAY = day(EVENT_DATE))

# Sampling method
counts_clean <- counts_clean %>%
  mutate(
    METHOD = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Thames only
counts_thames <- counts_clean %>%
  filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))

# Lat and long
sites_sf <- st_as_sf(counts_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

# Gears
sites_sf <- sites_sf %>%
  mutate(
    GEAR_TYPE = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)

leaflet(sites_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    color = ~gear_pal(GEAR_TYPE),
    radius = 6,
    stroke = TRUE, weight = 1, fillOpacity = 0.9,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
  addLegend(
    position = "bottomright",
    pal = gear_pal,
    values = ~GEAR_TYPE,
    title = "Gear Type",
    opacity = 1) %>%
  addLayersControl(
    overlayGroups = gear_types,
    options = layersControlOptions(collapsed = FALSE),
    position = "topright")
# RA
gear_totals <- counts_thames %>%
  filter(!is.na(METHOD)) %>%
  group_by(METHOD) %>%
  summarise(
    Total_Individuals = sum(FISH_COUNT, na.rm = TRUE),
    Total_Species = n_distinct(SPECIES_NAME),
    .groups = "drop")

# Get grand total
grand_total <- sum(gear_totals$Total_Individuals)

# Add relative abundance column
gear_totals <- gear_totals %>%
  mutate(
    Relative_Abundance = round(100 * Total_Individuals / grand_total, 1)
  ) %>%
  arrange(desc(Relative_Abundance)) %>%
  mutate(METHOD = factor(METHOD, levels = unique(METHOD)))

ggplot(gear_totals, aes(x = reorder(METHOD, -Relative_Abundance), y = Relative_Abundance)) +
  geom_col(fill = "tomato") +
  labs(
    title = "Relative abundance by sampling method",
    x = "Sampling method",
    y = "Relative abundance (%)") +
  theme_minimal()

# Matrix
gear_species_table <- counts_thames %>%
  filter(!is.na(METHOD), !is.na(SPECIES_NAME)) %>%
  count(METHOD, SPECIES_NAME) %>%
  pivot_wider(names_from = METHOD, values_from = n, values_fill = 0)

gear_pa <- gear_species_table %>%
  mutate(across(-SPECIES_NAME, ~ as.integer(. > 0)))

gear_cols <- names(gear_pa)[!names(gear_pa) %in% "SPECIES_NAME"]

gear_unique <- gear_pa %>%
  rowwise() %>%
  mutate(unique_to = if (sum(c_across(all_of(gear_cols))) == 1) {
    gear_cols[which(c_across(all_of(gear_cols)) == 1)]
  } else {
    NA_character_
  }) %>%
  ungroup() %>%
  filter(!is.na(unique_to))

unique_counts <- gear_unique %>%
  count(unique_to, name = "unique_species_count") %>%
  arrange(desc(unique_species_count))

ggplot(unique_counts, aes(x = reorder(unique_to, unique_species_count), y = unique_species_count, fill = unique_to)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Number of species unique to each gear type",
    x = "Sampling method", y = "Unique species count") +
  theme_minimal()

gear_pa_long <- gear_pa %>%
  pivot_longer(cols = -SPECIES_NAME, names_to = "METHOD", values_to = "present")

ggplot(gear_pa_long, aes(x = METHOD, y = SPECIES_NAME, fill = as.factor(present))) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("0" = "white", "1" = "steelblue")) +
  labs(
    title = "Species presence across sampling method",
    x = "Sampling method", y = "Species",
    fill = "Present") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

Focusing on seine net data only

# Seine net only
counts_thames_filtered <- counts_thames %>%
  filter(METHOD == "Seine Net")

How is fish alpha diversity changing over time across the estuary?

# Estuary
diversity_summary <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
    Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape
diversity_long <- diversity_summary %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Metric", values_to = "Value")

# Raw trends over time
ggplot(diversity_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(color = "red", linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, color = "black", linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Diversity metrics over time - Thames Estuary (seine net)",
    subtitle = "LOESS smoothed trends across all years",
    x = "Year", y = "Diversity metric") +
  theme_minimal()

# GLMs/LMMs for each metric
glm_richness   <- glm(Species_Richness ~ EVENT_DATE_YEAR, data = diversity_summary, family = poisson())
lm_abundance   <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = diversity_summary)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
results <- bind_rows(
  summary_table(glm_richness,   "Species Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(lm_shannon,     "Shannon Diversity"),
  summary_table(lm_simpson,     "Simpson Diversity"),
  summary_table(lm_margalef,    "Margalef Richness"),
  summary_table(lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in diversity metrics over time - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

How is fish alpha diversity changing over time across within each zone?

# Zone
zone_summary <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Wide community matrix for each zone-year
community_matrix <- zone_summary %>%
  pivot_wider(names_from = SPECIES_NAME, values_from = FISH_COUNT, values_fill = 0)

# Diversity metrics
diversity_zone_year <- community_matrix %>%
  rowwise() %>%
  mutate(
    Species_Richness = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)) > 0),
    Total_Abundance = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE))),
    Shannon_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "shannon"),
    Simpson_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "simpson")) %>%
  ungroup() %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance + 1),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness + 1),
    EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

# Reshape
diversity_long_zone <- diversity_zone_year %>%
  pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
                        Margalef_Richness, Pielou_Evenness, Total_Abundance),
               names_to = "Metric", values_to = "Value")

# Model
zone_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Value ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

zone_results <- diversity_long_zone %>%
  group_by(TOP_TIER_SITE, Metric) %>%
  group_modify(~ zone_model_summary(.x)) %>%
  ungroup() 

# Plot
ggplot(zone_results %>% filter(!is.na(Slope)),
       aes(x = Slope, y = TOP_TIER_SITE, color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3, color = "gray40") +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in diversity metrics over time by zone - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per zone & metric",
    x = "Slope (Change per year)",
    y = "Zone",
    color = "Significance",
    caption = "* p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal() +
  theme(strip.text = element_text(face = "bold"))

Where are the hotspots of ecological change?

# Site
diversity_site_year <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Species_Richness = n_distinct(SPECIES_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
    Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
    EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))

# Reshape
diversity_long_site <- diversity_site_year %>%
  pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
                        Margalef_Richness, Pielou_Evenness, Total_Abundance),
               names_to = "Metric", values_to = "Value")

# Model
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply per group
site_results <- diversity_long_site %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

site_results_clean <- site_results %>%
  filter(!is.na(Slope))

# Plot
ggplot(site_results_clean, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in diversity metrics over time by site - Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Site ranking
site_ranked_sig <- site_results_clean %>%
  filter(Significance != "") %>%  # Keep only significant trends
  mutate(
    Direction = ifelse(Slope > 0, "Increasing", "Decreasing"),
    Strength = abs(Slope)) %>%
  group_by(Metric) %>%
  arrange(desc(P_value)) %>%
  mutate(Rank = row_number()) %>%
  ungroup()

site_ranked_sig %>%
  arrange(Metric, Rank) %>%
  select(Metric, SITE_PARENT_NAME, Slope, Direction, P_value, Significance) %>%
  gt(groupname_col = "Metric") %>%
  fmt_number(columns = c(Slope, P_value), decimals = 3) %>%
  tab_header(
    title = "Sites with significant trends per diversity metric (seine net)",
    subtitle = "Ranked by magnitude of change") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    Slope = "Slope",
    Direction = "Trend",
    P_value = "p-value",
    Significance = "Significance")
Sites with significant trends per diversity metric (seine net)
Ranked by magnitude of change
Site Slope Trend p-value Significance
Pielou_Evenness
Richmond −0.001 Decreasing 0.029 *
Greenwich −0.001 Decreasing 0.022 *
Shannon_Diversity
Mucking −0.058 Decreasing 0.002 **
Simpson_Diversity
West Thurrock −0.002 Decreasing 0.036 *
Species_Richness
Greenhithe 3.000 Increasing 0.000 ***
Total_Abundance
Greenwich −6.276 Decreasing 0.038 *
Richmond 23.970 Increasing 0.022 *

Which species are changing in abundance over time?

# Estuary
species_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Fit linear models for each species
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply model per species
species_results <- species_trends %>%
  group_by(SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

# Filter significant results
species_results_clean <- species_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  labs(
    title = "Species-level trends in abundance over time in the Thames Estuary (seine net)",
    subtitle = "Slope ± 95% CI | Linear model per species",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

#Zone
species_zone_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

#Model
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

species_zone_results <- species_zone_trends %>%
  group_by(TOP_TIER_SITE, SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

species_zone_results_clean <- species_zone_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_zone_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_grid(~ TOP_TIER_SITE, scales = "free_y") +
  labs(
    title = "Species-level abundance trends by estuary zone (seine net)",
    subtitle = "Slopes from per-zone linear models ± 95% CI",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    color = "Significance",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal(base_size = 12) +
  theme(strip.text = element_text(face = "bold", size = 10))

# Site
species_site_trends <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
  summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Model
species_model_summary <- function(df) {
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
    return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
  }
  model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
  s <- summary(model)
  slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
  se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

species_site_results <- species_site_trends %>%
  group_by(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_modify(~ species_model_summary(.x)) %>%
  ungroup()

species_site_results_clean <- species_site_results %>%
  filter(!is.na(Slope) & Significance != "")

# Plot
ggplot(species_site_results_clean,
       aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", nrow = 5) +
  labs(
    title = "Species-level abundance trends by site (seine net)",
    subtitle = "Slopes from per-site linear models ± 95% CI",
    x = "Slope (Change in abundance per year)",
    y = "Species",
    color = "Significance",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 6))

What are the patterns in temporal beta diversity?

# Filter
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT))

# Presence/absence
fish_clean <- fish_clean %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

# Summarise
year_species_pa <- fish_clean %>%
  group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
  summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
  pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0) %>%
  column_to_rownames("EVENT_DATE_YEAR")

# Matrix
year_species_pa <- as.data.frame(year_species_pa)

# betapart core object
beta_core <- betapart.core(year_species_pa)

# Sørensen-based beta diversity components
beta_res <- beta.pair(beta_core, index.family = "sor")

# Extract matrices
sor_mat <- as.matrix(beta_res$beta.sor)
sim_mat <- as.matrix(beta_res$beta.sim)
sne_mat <- as.matrix(beta_res$beta.sne)

# Extract years in chronological order
years <- sort(as.numeric(rownames(sor_mat)))

# Create a data frame
beta_trends <- tibble(
  Year1 = years[-length(years)],
  Year2 = years[-1],
  beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
  beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
  beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)]))

# Linear models by component
lm_sor <- lm(beta_sor ~ Year2, data = beta_trends)
lm_sim <- lm(beta_sim ~ Year2, data = beta_trends)
lm_sne <- lm(beta_sne ~ Year2, data = beta_trends)

# Extract R² and p-values
get_lm_stats <- function(model) {
  summary_model <- summary(model)
  r2 <- round(summary_model$r.squared, 3)
  p <- round(summary_model$coefficients[2, 4], 3)
  return(list(r2 = r2, p = p))
}

sor_stats <- get_lm_stats(lm_sor)
sim_stats <- get_lm_stats(lm_sim)
sne_stats <- get_lm_stats(lm_sne)

beta_trends_long <- beta_trends %>%
  pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
               names_to = "Component",
               values_to = "Dissimilarity")

beta_trends_long <- beta_trends_long %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))


# R² and p-values
subtitle_text <- paste0(
  "Sorensen: R² = ", sor_stats$r2, ", p = ", sor_stats$p, "   |   ",
  "Turnover: R² = ", sim_stats$r2, ", p = ", sim_stats$p, "   |   ",
  "Nestedness: R² = ", sne_stats$r2, ", p = ", sne_stats$p)

# Plot
ggplot(beta_trends_long, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Sorensen" = "darkorchid", 
                                "Turnover" = "steelblue", 
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year change in beta diversity components in the Thames Estuary (seine net)",
    subtitle = subtitle_text,
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal()

# Zone
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

zone_list <- unique(fish_clean$TOP_TIER_SITE)
all_beta_trends <- list()

# Model
for (zone in zone_list) {
  cat("Processing zone:", zone, "\n")
  
  zone_data <- fish_clean %>%
    filter(TOP_TIER_SITE == zone)
  
  zone_pa <- zone_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  if (nrow(zone_pa) >= 3) {
    zone_matrix <- zone_pa %>%
      column_to_rownames("EVENT_DATE_YEAR") %>%
      as.data.frame()
    
    if (!all(rowSums(zone_matrix) == 0)) {
      beta_core <- betapart.core(zone_matrix)
      beta_res <- beta.pair(beta_core, index.family = "sor")
      
      sor_mat <- as.matrix(beta_res$beta.sor)
      sim_mat <- as.matrix(beta_res$beta.sim)
      sne_mat <- as.matrix(beta_res$beta.sne)
      years <- sort(as.numeric(rownames(sor_mat)))
      
      # Get year-to-year dissimilarities
      beta_trends <- tibble(
        Year1 = years[-length(years)],
        Year2 = years[-1],
        beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
        beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
        beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
      ) %>%
        pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
                     names_to = "Component", values_to = "Dissimilarity") %>%
        mutate(TOP_TIER_SITE = zone)
      
      all_beta_trends[[zone]] <- beta_trends
    }
  }
}

# Combine results
beta_trends_all <- bind_rows(all_beta_trends)

# Clean component labels
beta_trends_all <- beta_trends_all %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))

# R² and p-values
lm_stats_list <- beta_trends_all %>%
  group_by(TOP_TIER_SITE, Component) %>%
  summarise(
    lm_model = list(lm(Dissimilarity ~ Year2)),
    .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))) %>%
  select(TOP_TIER_SITE, Component, r2, p)


facet_grid(rows = vars(TOP_TIER_SITE))

beta_trends_all <- beta_trends_all %>%
  mutate(
    TOP_TIER_ZONE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower")),
  )

# Subtitle
zone_order <- c("Thames Upper", "Thames Middle", "Thames Lower")

subtitle_text <- lm_stats_list %>%
  mutate(
    label = paste0(
      "<span style='color:",
      case_when(Component == "Sorensen" ~ "darkorchid",
                Component == "Turnover" ~ "steelblue",
                Component == "Nestedness" ~ "darkorange"),
      "'>", Component, "</span>: R² = ", r2, ", p = ", p)) %>%
  group_by(TOP_TIER_SITE) %>%
  summarise(text = paste(label, collapse = " &nbsp;&nbsp;|&nbsp;&nbsp; "), .groups = "drop") %>%
  mutate(TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = zone_order)) %>%
  arrange(TOP_TIER_SITE) %>%
  summarise(subtitle = paste(text, collapse = "<br><br>")) %>%
  pull(subtitle)

# Plot
ggplot(beta_trends_all, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  facet_grid(rows = vars(TOP_TIER_ZONE), scales = "free_y") +
  scale_color_manual(values = c("Sorensen" = "darkorchid",
                                "Turnover" = "steelblue",
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year beta diversity trends by zone (seine net)",
    subtitle = subtitle_text,
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal() +
  theme(
    plot.subtitle = ggtext::element_markdown(size = 10),
    strip.text = element_text(face = "bold", size = 10),
    axis.text.x = element_text(angle = 45, hjust = 1))

# Site
fish_clean <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
  mutate(PA = as.numeric(FISH_COUNT > 0))

site_list <- unique(fish_clean$SITE_PARENT_NAME)

site_beta_trends <- list()

# Model
for (site in site_list) {
  cat("Processing site:", site, "\n")
  
  site_data <- fish_clean %>%
    filter(SITE_PARENT_NAME == site)
  
  site_pa <- site_data %>%
    group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
    summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
    pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
  
  if (nrow(site_pa) >= 3) {
    site_matrix <- site_pa %>%
      column_to_rownames("EVENT_DATE_YEAR") %>%
      as.data.frame()
    
    if (!all(rowSums(site_matrix) == 0)) {
      beta_core <- betapart.core(site_matrix)
      beta_res <- beta.pair(beta_core, index.family = "sor")
      
      sor_mat <- as.matrix(beta_res$beta.sor)
      sim_mat <- as.matrix(beta_res$beta.sim)
      sne_mat <- as.matrix(beta_res$beta.sne)
      years <- sort(as.numeric(rownames(sor_mat)))
      
      beta_trends <- tibble(
        Year1 = years[-length(years)],
        Year2 = years[-1],
        beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
        beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
        beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
      ) %>%
        pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
                     names_to = "Component", values_to = "Dissimilarity") %>%
        mutate(SITE_PARENT_NAME = site)
      
      site_beta_trends[[site]] <- beta_trends
    }
  }
}

beta_trends_site <- bind_rows(site_beta_trends) %>%
  mutate(Component = recode(Component,
                            beta_sor = "Sorensen",
                            beta_sim = "Turnover",
                            beta_sne = "Nestedness"))
# R² and p-values
lm_stats_list <- beta_trends_site %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  summarise(lm_model = list(lm(Dissimilarity ~ Year2)), .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))
  ) %>%
  select(SITE_PARENT_NAME, Component, r2, p)

ggplot(beta_trends_site, aes(x = Year2, y = Dissimilarity, color = Component)) +
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", ncol = 4) +
  scale_color_manual(values = c("Sorensen" = "darkorchid",
                                "Turnover" = "steelblue",
                                "Nestedness" = "darkorange")) +
  labs(
    title = "Year-to-year beta diversity trends by site (seine net)",
    x = "Year", y = "Dissimilarity",
    color = "Component") +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom")

# Rank and table
lm_stats_list_site <- beta_trends_site %>%
  group_by(SITE_PARENT_NAME, Component) %>%
  summarise(
    lm_model = list(lm(Dissimilarity ~ Year2)),
    .groups = "drop") %>%
  mutate(
    summary = map(lm_model, summary),
    slope = map_dbl(summary, ~ coef(.x)["Year2", "Estimate"]),
    r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
    p = map_dbl(summary, ~ round(coef(.x)["Year2", "Pr(>|t|)"], 3)),
    Significant = p <= 0.05,
    Direction = case_when(
      Significant & slope > 0 ~ "Increasing",
      Significant & slope < 0 ~ "Decreasing",
      TRUE ~ "Stable")) %>%
  select(SITE_PARENT_NAME, Component, slope, r2, p, Significant, Direction)

beta_trend_summary_site <- lm_stats_list_site %>%
  arrange(Component, Direction, SITE_PARENT_NAME)

beta_trend_summary_site_sig <- beta_trend_summary_site %>%
  filter(Significant == TRUE)

beta_trend_summary_site_sig %>%
  gt(groupname_col = "Component") %>%
  fmt_number(columns = c(slope, r2, p), decimals = 3) %>%
  tab_header(
    title = "Significant beta diversity trends by site (seine net)",
    subtitle = "Only sites with p ≤ 0.05 for Sorensen, Turnover, or Nestedness components") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    slope = "Slope",
    r2 = "R²",
    p = "p-value",
    Direction = "Trend") %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    row.striping.include_table_body = TRUE)

Which dominant species reduce diversity at the site scale?

# Identify dominant species per site-year
dominant_species <- counts_thames_filtered %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
  summarise(Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop") %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  mutate(Proportion = Abundance / sum(Abundance)) %>%
  filter(Proportion == max(Proportion))

# Max dominance index per site-year
dominance_index <- dominant_species %>%
  select(EVENT_DATE_YEAR, SITE_PARENT_NAME, Max_Dominance = Proportion)

# Diversity metrics
diversity_metrics_site <- diversity_long_site %>%
  filter(Metric %in% c("Pielou_Evenness", "Species_Richness")) %>%
  pivot_wider(names_from = Metric, values_from = Value)

# Combine with dominant species
dominance_combined <- diversity_metrics_site %>%
  left_join(dominance_index, by = c("EVENT_DATE_YEAR", "SITE_PARENT_NAME")) %>%
  left_join(dominant_species %>% select(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME),
            by = c("EVENT_DATE_YEAR", "SITE_PARENT_NAME"))

# Linear models
lm_evenness <- lm(Pielou_Evenness ~ Max_Dominance + SPECIES_NAME, data = dominance_combined)
lm_richness <- lm(Species_Richness ~ Max_Dominance + SPECIES_NAME, data = dominance_combined)

# Extract significant species
extract_significant_species <- function(model) {
  summary(model)$coefficients %>%
    as.data.frame() %>%
    rownames_to_column("Term") %>%
    filter(grepl("SPECIES_NAME", Term) & `Pr(>|t|)` < 0.05) %>%
    mutate(SPECIES_NAME = gsub("SPECIES_NAME", "", Term)) %>%
    pull(SPECIES_NAME)
}

# Significant species per metric
sig_species_evenness <- extract_significant_species(lm_evenness)
sig_species_richness <- extract_significant_species(lm_richness)
sig_species_all <- union(sig_species_evenness, sig_species_richness)

# Sites where these species were dominant
sig_species_sites <- dominance_combined %>%
  filter(SPECIES_NAME %in% sig_species_all) %>%
  mutate(
    Affects = case_when(
      SPECIES_NAME %in% sig_species_evenness & SPECIES_NAME %in% sig_species_richness ~ "Both",
      SPECIES_NAME %in% sig_species_evenness ~ "Evenness",
      SPECIES_NAME %in% sig_species_richness ~ "Richness",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(Affects))

# Format
sig_species_plot_data <- sig_species_sites %>%
  pivot_longer(cols = c(Pielou_Evenness, Species_Richness),
               names_to = "Metric", values_to = "Value") %>%
  filter(
    (Metric == "Pielou_Evenness" & Affects %in% c("Evenness", "Both")) |
      (Metric == "Species_Richness" & Affects %in% c("Richness", "Both"))
  ) %>%
  mutate(
    Affects = factor(Affects, levels = c("Richness", "Evenness", "Both")),
    Metric = recode(Metric,
                    "Pielou_Evenness" = "Evenness",
                    "Species_Richness" = "Richness")
  )

# Plot
ggplot(sig_species_plot_data,
       aes(x = EVENT_DATE_YEAR, y = reorder(SITE_PARENT_NAME, EVENT_DATE_YEAR))) +
  geom_point(aes(size = Max_Dominance, color = Affects), alpha = 0.8) +
  scale_color_manual(values = c("Richness" = "steelblue", "Evenness" = "firebrick", "Both" = "purple")) +
  facet_grid(Metric ~ SPECIES_NAME, scales = "free", switch = "x") +
  labs(
    title = "Sites dominated by species significantly affecting diversity metrics (seine net)",
    subtitle = "Point size = dominance level | Color = affected metric(s)",
    x = "Year",
    y = "Site",
    color = "Affects",
    size = "Dominance") +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.y = element_text(size = 7),
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(face = "bold", size = 10),
    strip.placement = "outside",
    panel.spacing = unit(1, "lines"),
    legend.position = "bottom")

Length data

Data wrangling

EA sampling sites

# Load data
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")

lengths <- read_csv("TraC_Fish_Individual_Lengths_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")
lengths_clean <- lengths %>% left_join(sites, by = "SITE_ID")

lengths_clean <- lengths_clean %>%
  select(
    SITE_ID,
    SITE_NAME = SITE_NAME.x,
    SITE_PARENT_NAME,
    EVENT_DATE,
    SPECIES_NAME,
    LATIN_NAME,
    FISH_LENGTH,
    TOP_TIER_SITE,
    SITE_RANKED_NGR,
    SITE_RANKED_EASTING,
    SITE_RANKED_NORTHING)

# Convert dates and extract components
lengths_clean <- lengths_clean %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_DAY = day(EVENT_DATE))

# Sampling method
lengths_clean <- lengths_clean %>%
  mutate(
    METHOD = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Thames only
lengths_thames <- lengths_clean %>%
  filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))

# Lat and long
sites_sf <- st_as_sf(lengths_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

# Gears
sites_sf <- sites_sf %>%
  mutate(
    GEAR_TYPE = case_when(
      str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
      str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
      str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
      str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
      str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
      str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
      str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
      str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
      TRUE ~ "Unknown"))

# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)

leaflet(sites_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    color = ~gear_pal(GEAR_TYPE),
    radius = 6,
    stroke = TRUE, weight = 1, fillOpacity = 0.9,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
  addLegend(
    position = "bottomright",
    pal = gear_pal,
    values = ~GEAR_TYPE,
    title = "Gear Type",
    opacity = 1) %>%
  addLayersControl(
    overlayGroups = gear_types,
    options = layersControlOptions(collapsed = FALSE),
    position = "topright")
# Sampling method summary
lengths_gear <- lengths_thames %>%
  mutate(
    EVENT_YEAR = lubridate::year(lubridate::ymd(EVENT_DATE))) %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(is_juvenile = FISH_LENGTH <= juvenile_threshold)

gear_summary <- lengths_gear %>%
  group_by(METHOD) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    prop_juvenile = mean(is_juvenile, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop")

gear_summary <- gear_summary %>%
  arrange(desc(prop_juvenile)) %>%
  mutate(METHOD = factor(METHOD, levels = unique(METHOD)))

glm_model <- glm(is_juvenile ~ METHOD, data = lengths_gear, family = "binomial")

p_vals <- tidy(glm_model) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    gear = gsub("METHOD", "", term),
    label = paste0("p = ", signif(p.value, 2)))

ggplot(gear_summary, aes(x = METHOD, y = prop_juvenile)) +
  geom_col(fill = "tomato") +
  labs(
    title = "Proportion of juveniles by sampling method",
    x = "Sampling method", y = "Proportion juvenile") +
  ylim(0, 1) +
  theme_minimal()

# Species per gear
gear_summary <- lengths_thames %>%
  group_by(METHOD) %>%
  summarise(
    species_richness = n_distinct(SPECIES_NAME),
    total_fish = n(),
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    juvenile_proportion = mean(FISH_LENGTH <= quantile(FISH_LENGTH, 0.25, na.rm = TRUE), na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(species_richness))

print(gear_summary)

gear_species_table <- lengths_thames %>%
  count(METHOD, SPECIES_NAME) %>%
  pivot_wider(names_from = METHOD, values_from = n, values_fill = 0)

gear_pa <- gear_species_table %>%
  mutate(across(-SPECIES_NAME, ~ as.integer(. > 0)))

gear_cols <- names(gear_pa)[!names(gear_pa) %in% "SPECIES_NAME"]

gear_unique <- gear_pa %>%
  rowwise() %>%
  mutate(unique_to = if (sum(c_across(all_of(gear_cols))) == 1) {
    gear_cols[which(c_across(all_of(gear_cols)) == 1)]
  } else {
    NA_character_
  }) %>%
  ungroup() %>%
  filter(!is.na(unique_to))

unique_counts <- gear_unique %>%
  count(unique_to, name = "unique_species_count") %>%
  arrange(desc(unique_species_count))

ggplot(unique_counts, aes(x = reorder(unique_to, unique_species_count), y = unique_species_count, fill = unique_to)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Number of species unique to each gear type",
    x = "Gear type", y = "Unique species count") +
  theme_minimal()

gear_unique %>%
  arrange(unique_to, SPECIES_NAME)

gear_pa_long <- gear_pa %>%
  pivot_longer(cols = -SPECIES_NAME, names_to = "METHOD", values_to = "present")

ggplot(gear_pa_long, aes(x = METHOD, y = SPECIES_NAME, fill = as.factor(present))) +
  geom_tile(color = "white") +
  scale_fill_manual(values = c("0" = "white", "1" = "steelblue")) +
  labs(
    title = "Species presence across gear types",
    x = "Gear type", y = "Species",
    fill = "Present") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 7))

Focusing on seine net sampling only

lengths_thames_filtered <- lengths_thames %>%
  filter(METHOD %in% c("Seine Net", "Kick Net"))

# Check for NAs or 0-length values
lengths_thames_filtered %>%
  filter(is.na(FISH_LENGTH) | FISH_LENGTH == 0)
# Boxplot for outliers
ggplot(lengths_thames_filtered, aes(x = METHOD, y = FISH_LENGTH)) +
  geom_boxplot(outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Boxplot of fish lengths by gear type",
    x = "Gear type",
    y = "Fish length (mm)")

ggplot(lengths_thames_filtered, aes(x = TOP_TIER_SITE, y = FISH_LENGTH)) +
  geom_boxplot(outlier.colour = "red") +
  theme_minimal() +
  labs(title = "Fish lengths by estuary zone (seine net)")

# Remove outliers
Q1 <- quantile(lengths_thames_filtered$FISH_LENGTH, 0.25, na.rm = TRUE)
Q3 <- quantile(lengths_thames_filtered$FISH_LENGTH, 0.75, na.rm = TRUE)
IQR_value <- Q3 - Q1

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

lengths_thames_no_outliers <- lengths_thames_filtered %>%
  filter(FISH_LENGTH >= lower_bound, FISH_LENGTH <= upper_bound)

lengths_thames_filtered <- lengths_thames_no_outliers

ggplot(lengths_thames_filtered, aes(x = FISH_LENGTH)) +
  geom_histogram(bins = 100, fill = "darkorange") +
  labs(title = "Distribution of fish lengths (seine net)", x = "Fish length (mm)", y = "Frequency") +
  theme_minimal()

Are fish getting larger in the Thames Estuary?

#Estuary
length_stats_year <- lengths_thames_filtered %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n()) %>%
  arrange(EVENT_YEAR)

length_stats_species_year <- lengths_thames %>%
  group_by(EVENT_YEAR, SPECIES_NAME) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n()) %>%
  ungroup() %>%
  arrange(EVENT_YEAR, SPECIES_NAME)

# Model
mean_lm <- lm(mean_length ~ EVENT_YEAR, data = length_stats_year)
mean_slope <- coef(summary(mean_lm))["EVENT_YEAR", "Estimate"]
mean_p <- coef(summary(mean_lm))["EVENT_YEAR", "Pr(>|t|)"]

# Model
median_lm <- lm(median_length ~ EVENT_YEAR, data = length_stats_year)
median_slope <- coef(summary(median_lm))["EVENT_YEAR", "Estimate"]
median_p <- coef(summary(median_lm))["EVENT_YEAR", "Pr(>|t|)"]

# Plot
ggplot(length_stats_year, aes(x = EVENT_YEAR)) +
  geom_line(aes(y = mean_length, color = "Mean", linetype = "Mean"), size = 1) +
  geom_point(aes(y = mean_length, color = "Mean")) +
  geom_smooth(aes(y = mean_length, color = "Mean", linetype = "Mean"),
              method = "lm", se = FALSE) +
  
  geom_line(aes(y = median_length, color = "Median", linetype = "Median"), size = 1) +
  geom_point(aes(y = median_length, color = "Median")) +
  geom_smooth(aes(y = median_length, color = "Median", linetype = "Median"),
              method = "lm", se = FALSE) +
  
  scale_color_manual(values = c("Mean" = "blue", "Median" = "darkorange")) +
  scale_linetype_manual(values = c("Mean" = "solid", "Median" = "dashed")) +
  
  labs(
    title = "Mean and median fish length in the Thames Estuary (seine net)",
    subtitle = paste0(
      "Mean slope = ", round(mean_slope, 2), " mm/year (p = ", signif(mean_p, 3), "); ",
      "Median slope = ", round(median_slope, 2), " mm/year (p = ", signif(median_p, 3), ")"),
    x = "Year",
    y = "Fish length (mm)",
    color = "Statistic",
    linetype = "Statistic") +
  theme_minimal() +
  theme(legend.position = "top")

# Zone
length_stats_tier <- lengths_thames_filtered %>%
  group_by(EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    median_length = median(FISH_LENGTH, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop") %>%
  arrange(EVENT_YEAR, TOP_TIER_SITE)

length_labels <- length_stats_tier %>%
  nest(data = -TOP_TIER_SITE) %>%
  mutate(
    model_mean = map(data, ~ lm(mean_length ~ EVENT_YEAR, data = .x)),
    model_median = map(data, ~ lm(median_length ~ EVENT_YEAR, data = .x)),
    stats_mean = map(model_mean, tidy),
    stats_median = map(model_median, tidy)) %>%
  transmute(
    TOP_TIER_SITE,
    label = pmap(list(TOP_TIER_SITE, stats_mean, stats_median), function(zone, m, md) {
      slope_mean <- m[m$term == "EVENT_YEAR", "estimate"]
      p_mean <- m[m$term == "EVENT_YEAR", "p.value"]
      slope_median <- md[md$term == "EVENT_YEAR", "estimate"]
      p_median <- md[md$term == "EVENT_YEAR", "p.value"]
      paste0(
        zone, "\n",
        "Mean: ", round(slope_mean, 2), " mm/year (p=", signif(p_mean, 2), ")\n",
        "Median: ", round(slope_median, 2), " mm/year (p=", signif(p_median, 2), ")")})) %>%
  unnest(label)

length_stats_tier$TOP_TIER_SITE <- factor(length_stats_tier$TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower"))

ordered_levels <- c("Thames Upper", "Thames Middle", "Thames Lower")
length_labels$TOP_TIER_SITE <- factor(length_labels$TOP_TIER_SITE, levels = ordered_levels)
length_stats_tier$TOP_TIER_SITE <- factor(length_stats_tier$TOP_TIER_SITE, levels = ordered_levels)

# Plot
ggplot(length_stats_tier, aes(x = EVENT_YEAR)) +
  geom_line(aes(y = mean_length, color = "Mean", linetype = "Mean"), size = 1) +
  geom_point(aes(y = mean_length, color = "Mean")) +
  geom_smooth(aes(y = mean_length, color = "Mean", linetype = "Mean"), method = "lm", se = FALSE) +
  
  geom_line(aes(y = median_length, color = "Median", linetype = "Median"), size = 1) +
  geom_point(aes(y = median_length, color = "Median")) +
  geom_smooth(aes(y = median_length, color = "Median", linetype = "Median"), method = "lm", se = FALSE) +
  
  scale_color_manual(values = c("Mean" = "blue", "Median" = "darkorange")) +
  scale_linetype_manual(values = c("Mean" = "solid", "Median" = "dashed")) +
  
  facet_wrap(~TOP_TIER_SITE, labeller = as_labeller(deframe(length_labels)), scales = "free_y") +
  
  labs(
    title = "Mean and median fish length by estuary zone (seine net)",
    x = "Year",
    y = "Fish length (mm)",
    color = "Statistic",
    linetype = "Statistic") +
  theme_minimal() +
  theme(legend.position = "top")

Are there any changes in juvenile arrival?

lengths_juveniles <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    is_juvenile = FISH_LENGTH <= juvenile_threshold) %>%
  ungroup()

first_juvenile_estuary <- lengths_juveniles %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  mutate(JULIAN_DAY = yday(lubridate::ymd(EVENT_DATE))) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_julian_day = min(JULIAN_DAY, na.rm = TRUE),
    .groups = "drop")

model_julian <- lm(first_julian_day ~ EVENT_YEAR, data = first_juvenile_estuary)
model_summary <- summary(model_julian)
slope <- round(coef(model_julian)["EVENT_YEAR"], 2)
pval <- signif(coef(summary(model_julian))["EVENT_YEAR", "Pr(>|t|)"], 3)

ggplot(first_juvenile_estuary, aes(x = EVENT_YEAR, y = first_julian_day)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "steelblue", size = 2) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
  scale_x_continuous(breaks = seq(min(first_juvenile_estuary$EVENT_YEAR), max(first_juvenile_estuary$EVENT_YEAR), by = 10)) +
  labs(
    title = "Timing of first juvenile appearance in the Thames Estuary (seine net)",
    subtitle = paste0("Slope = ", slope, " days/year, p = ", pval),
    x = "Year",
    y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14),
    plot.subtitle = element_text(size = 11))

Is there a sampling date bias?

juvenile_thresholds <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  summarise(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    .groups = "drop")

lengths_tagged <- lengths_thames_filtered %>%
  left_join(juvenile_thresholds, by = "SPECIES_NAME") %>%
  mutate(
    is_juvenile = FISH_LENGTH <= juvenile_threshold)

first_juvenile <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_juvenile_day = min(yday(EVENT_DATE)),
    .groups = "drop")

first_sampling <- lengths_thames_filtered %>%
  filter(!is.na(EVENT_DATE)) %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    first_sample_day = min(yday(EVENT_DATE)),
    .groups = "drop")

juvenile_vs_sampling <- left_join(first_juvenile, first_sampling, by = "EVENT_YEAR") %>%
  pivot_longer(cols = c(first_juvenile_day, first_sample_day),
               names_to = "event_type",
               values_to = "julian_day") %>%
  mutate(event_type = recode(event_type,
                             first_juvenile_day = "First juvenile",
                             first_sample_day = "First sample"))

ggplot(juvenile_vs_sampling, aes(x = EVENT_YEAR, y = julian_day, color = event_type)) +
  geom_point(size = 2) +
  geom_line() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed") +
  scale_color_manual(values = c("First juvenile" = "blue", "First sample" = "gray40")) +
  labs(
    title = "Comparison of first juvenile appearance vs first sampling date (seine net)",
    x = "Year",
    y = "Julian day",
    color = "Event type") +
  theme_minimal()

Are there any changes in juvenile arrival by zone?

lengths_juveniles <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(
    juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE),
    is_juvenile = FISH_LENGTH <= juvenile_threshold) %>%
  ungroup()

first_juvenile_julian <- lengths_juveniles %>%
  filter(is_juvenile, !is.na(EVENT_DATE), !is.na(TOP_TIER_SITE)) %>%
  mutate(JULIAN_DAY = yday(lubridate::ymd(EVENT_DATE))) %>%
  group_by(EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    first_julian_day = min(JULIAN_DAY, na.rm = TRUE),
    .groups = "drop")

julian_models <- first_juvenile_julian %>%
  group_by(TOP_TIER_SITE) %>%
  do(tidy(lm(first_julian_day ~ EVENT_YEAR, data = .))) %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope = round(estimate, 2),
    pval = signif(p.value, 3),
    facet_label = paste0(TOP_TIER_SITE, "\nSlope = ", slope, " days/year, p = ", pval)) %>%
  select(TOP_TIER_SITE, facet_label)

first_juvenile_labeled <- first_juvenile_julian %>%
  left_join(julian_models, by = "TOP_TIER_SITE") %>%
  mutate(
    TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower")),
    facet_label = factor(facet_label, levels = julian_models$facet_label[match(c("Thames Upper", "Thames Middle", "Thames Lower"), julian_models$TOP_TIER_SITE)]))

ggplot(first_juvenile_labeled, aes(x = EVENT_YEAR, y = first_julian_day)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "steelblue", size = 2) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
  facet_wrap(~ facet_label, scales = "free_y") +
  scale_x_continuous(breaks = seq(min(first_juvenile_labeled$EVENT_YEAR), max(first_juvenile_labeled$EVENT_YEAR), by = 10)) +
  labs(
    title = "Timing of first juvenile appearance by estuary zone (seine net)",
    x = "Year", y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10),
    plot.title = element_text(size = 14),
    plot.subtitle = element_text(size = 11),
    legend.position = "none")

# Which species?
lengths_tagged <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    is_juvenile = FISH_LENGTH <= juvenile_threshold,
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE),
    julian_day = yday(EVENT_DATE))

first_juvenile_per_species <- lengths_tagged %>%
  filter(is_juvenile, !is.na(TOP_TIER_SITE)) %>%
  group_by(SPECIES_NAME, EVENT_YEAR, TOP_TIER_SITE) %>%
  summarise(
    first_day = min(julian_day, na.rm = TRUE),
    n_juveniles = n(),
    .groups = "drop") %>%
  filter(n_juveniles >= 5)

species_enough_years <- first_juvenile_per_species %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  summarise(n_years = n(), .groups = "drop") %>%
  filter(n_years >= 5)

first_juvenile_filtered <- first_juvenile_per_species %>%
  semi_join(species_enough_years, by = c("SPECIES_NAME", "TOP_TIER_SITE"))

recruitment_trends <- first_juvenile_filtered %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  do(tidy(lm(first_day ~ EVENT_YEAR, data = .))) %>%
  ungroup() %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope = round(estimate, 2),
    pval = signif(p.value, 3),
    trend = case_when(
      pval < 0.05 & slope < 0 ~ "Earlier (📉)",
      pval < 0.05 & slope > 0 ~ "Later (📈)",
      TRUE ~ "No significant trend"),
    label = paste0(SPECIES_NAME, "\nSlope = ", slope, ", p = ", pval))

recruitment_plot_data <- first_juvenile_filtered %>%
  inner_join(recruitment_trends %>% filter(pval < 0.05),
             by = c("SPECIES_NAME", "TOP_TIER_SITE"))

recruitment_plot_data$TOP_TIER_SITE <- factor(
  recruitment_plot_data$TOP_TIER_SITE,
  levels = c("Thames Upper", "Thames Middle", "Thames Lower"))

ggplot(recruitment_plot_data, aes(x = EVENT_YEAR, y = first_day)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  facet_grid(TOP_TIER_SITE ~ label, scales = "free_y") +
  labs(
    title = "Species-specific timing of first juvenile appearance (seine net)",
    subtitle = "Only significant trends (p < 0.05)",
    x = "Year", y = "Julian Day (first appearance)") +
  theme_minimal() +
  theme(strip.text = element_text(size = 9))

Are there any changes in growth rate?

seasonal_growth <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE)) %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE)
  ) %>%
  group_by(EVENT_YEAR, EVENT_MONTH) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n = n(),
    .groups = "drop")

growth_rates <- seasonal_growth %>%
  group_by(EVENT_YEAR) %>%
  summarise(
    growth_model = list(lm(mean_length ~ EVENT_MONTH)),
    .groups = "drop") %>%
  mutate(
    slope_mm_per_month = map_dbl(growth_model, ~ coef(.x)[2]))

growth_trend <- lm(slope_mm_per_month ~ EVENT_YEAR, data = growth_rates)
growth_summary <- tidy(growth_trend)

growth_slope <- round(growth_summary$estimate[2], 3)
growth_p <- signif(growth_summary$p.value[2], 3)

ggplot(growth_rates, aes(x = EVENT_YEAR, y = slope_mm_per_month)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed", color = "darkblue") +
  labs(
    title = paste0("Seasonal growth rate over time (seine net)\nSlope = ", growth_slope, " mm/mo/yr, p = ", growth_p),
    x = "Year", y = "Estimated growth rate (mm/month)") +
  theme_minimal()

# Are there any changes in growth rate by zone?
growth_by_zone <- lengths_tagged %>%
  filter(is_juvenile, !is.na(EVENT_DATE), !is.na(TOP_TIER_SITE)) %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    EVENT_YEAR = year(EVENT_DATE)
  ) %>%
  group_by(TOP_TIER_SITE, EVENT_YEAR, EVENT_MONTH) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  group_by(TOP_TIER_SITE, EVENT_YEAR) %>%
  summarise(
    slope_mm_per_month = coef(lm(mean_length ~ EVENT_MONTH))[2],
    .groups = "drop"
  )

growth_trends <- growth_by_zone %>%
  group_by(TOP_TIER_SITE) %>%
  do(tidy(lm(slope_mm_per_month ~ EVENT_YEAR, data = .))) %>%
  filter(term == "EVENT_YEAR") %>%
  mutate(
    slope_label = paste0("Slope = ", round(estimate, 2), " mm/mo/yr\np = ", signif(p.value, 2))
  )

zone_labels <- growth_trends %>%
  select(TOP_TIER_SITE, slope_label) %>%
  mutate(
    facet_label = paste0(TOP_TIER_SITE, "\n", slope_label),
    TOP_TIER_SITE = fct_relevel(TOP_TIER_SITE, "Thames Upper", "Thames Middle", "Thames Lower")
  )

growth_by_zone_labeled <- growth_by_zone %>%
  left_join(zone_labels, by = "TOP_TIER_SITE")

growth_by_zone_labeled <- growth_by_zone_labeled %>%
  mutate(
    facet_label = fct_relevel(facet_label, 
                              zone_labels$facet_label[match(c("Thames Upper", "Thames Middle", "Thames Lower"),
                                                            zone_labels$TOP_TIER_SITE)]))

ggplot(growth_by_zone_labeled, aes(x = EVENT_YEAR, y = slope_mm_per_month)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, linetype = "dashed", color = "blue") +
  facet_wrap(~ facet_label, scales = "free_y") +
  labs(
    title = "Seasonal growth rate over time by estuary zone (seine net)",
    x = "Year", y = "Growth rate (mm/month)"
  ) +
  theme_minimal()

Are there seasonal differences in fish length?

## Nesting season within zones and sites
lengths_seasonal <- lengths_thames_filtered %>%
  mutate(
    EVENT_DATE = ymd(EVENT_DATE),
    EVENT_MONTH = month(EVENT_DATE),
    season = case_when(
      EVENT_MONTH %in% 1:6 ~ "Early",
      EVENT_MONTH %in% 7:12 ~ "Late")) %>%
  filter(!is.na(season))

#Estuary
model_lsmean_total <- lm(FISH_LENGTH ~ season + EVENT_YEAR, data = lengths_seasonal)

lsmeans_total <- emmeans(model_lsmean_total, ~ season) %>%
  as.data.frame()

ggplot(lsmeans_total, aes(x = season, y = emmean, fill = season)) +
  geom_col() +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  labs(title = "Fish length by season (seine net)") +
  theme_minimal()

Do these seasonal differences vary by zone or site?

# By zone
model_lsmean_zone <- lm(FISH_LENGTH ~ season * TOP_TIER_SITE + EVENT_YEAR, data = lengths_seasonal)

lsmeans_zone <- emmeans(model_lsmean_zone, ~ season * TOP_TIER_SITE) %>%
  as.data.frame()

ggplot(lsmeans_zone, aes(x = season, y = emmean, fill = season)) +
  geom_col(position = position_dodge()) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  facet_wrap(~ TOP_TIER_SITE, scales = "free_y") +
  labs(title = "Fish length by season and zone (seine net)") +
  theme_minimal()

# By site
model_lsmean_site <- lm(FISH_LENGTH ~ season * SITE_PARENT_NAME + EVENT_YEAR, data = lengths_seasonal)

lsmeans_site <- emmeans(model_lsmean_site, ~ season * SITE_PARENT_NAME) %>%
  as.data.frame()

ggplot(lsmeans_site, aes(x = season, y = emmean, fill = season)) +
  geom_col(position = position_dodge()) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  labs(title = "Fish length by season and site (seine net)") +
  theme_minimal()

Which species show significant seasonal shifts in size?

#Estuary
species_season_diffs <- lengths_seasonal %>%
  group_by(SPECIES_NAME) %>%
  filter(n_distinct(season) >= 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(estimate_diff = estimate, se = SE, pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  coord_flip() +
  labs(title = "Significant seasonal shifts in fish length by species (seine net)") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 7),
    legend.position = "top")

# Zone
species_season_diffs_zone <- lengths_seasonal %>%
  filter(!is.na(TOP_TIER_SITE)) %>%
  group_by(SPECIES_NAME, TOP_TIER_SITE) %>%
  filter(n_distinct(season) == 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(
    estimate_diff = estimate,
    se = SE,
    pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs_zone %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  facet_wrap(~ TOP_TIER_SITE, scales = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  labs(
    title = "Significant seasonal fish length shifts by species and zone (seine net)",
    subtitle = "LSMean length difference (Late − Early), p < 0.05",
    x = "Species",
    y = "Length shift (mm)",
    fill = "Direction") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 10),
    axis.text.y = element_text(size = 7),
    legend.position = "top")

# Site
species_season_diffs_site <- lengths_seasonal %>%
  filter(!is.na(SITE_PARENT_NAME)) %>%
  group_by(SPECIES_NAME, SITE_PARENT_NAME) %>%
  filter(n_distinct(season) == 2, n_distinct(EVENT_YEAR) >= 2) %>%
  nest() %>%
  mutate(
    model = map(data, ~ lm(FISH_LENGTH ~ season + EVENT_YEAR, data = .x)),
    emmeans = map(model, ~ emmeans(.x, ~ season)),
    contrast = map(emmeans, ~ contrast(.x, method = "revpairwise") %>% as.data.frame())) %>%
  unnest(contrast) %>%
  rename(
    estimate_diff = estimate,
    se = SE,
    pval = p.value) %>%
  mutate(
    abs_diff = abs(estimate_diff),
    direction = ifelse(estimate_diff > 0, "Late > Early", "Early > Late"),
    sig = ifelse(pval < 0.05, "Yes", "No"))

ggplot(species_season_diffs_site %>% filter(sig == "Yes"),
       aes(x = reorder(SPECIES_NAME, estimate_diff), y = estimate_diff, fill = direction)) +
  geom_col() +
  geom_errorbar(aes(ymin = estimate_diff - se, ymax = estimate_diff + se), width = 0.2) +
  facet_wrap(~ SITE_PARENT_NAME, scales = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  coord_flip() +
  labs(
    title = "Significant seasonal fish length shifts by site and species",
    subtitle = "LSMean length difference (Late − Early), p < 0.05",
    x = "Species",
    y = "Length shift (mm)",
    fill = "Direction") +
  scale_fill_manual(values = c("Late > Early" = "steelblue", "Early > Late" = "tomato")) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 9),
    axis.text.y = element_text(size = 6),
    legend.position = "top")

Functional guilds

# Load data
guilds <- read_csv("species_list_guilds.csv")

# Summarise
e_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, ECOLOGY_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
e_guild_trends <- e_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(ECOLOGY_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
e_guild_long <- e_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(e_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Ecology guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
e_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = e_guild_trends, family = poisson())
e_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = e_guild_trends)
e_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = e_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
e_results <- bind_rows(
  summary_table(e_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(e_lm_shannon,     "Shannon Diversity"),
  summary_table(e_lm_simpson,     "Simpson Diversity"),
  summary_table(e_lm_margalef,    "Margalef Richness"),
  summary_table(e_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(e_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in ecology guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
e_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, ECOLOGY_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

e_guild_trends_site <- e_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(ECOLOGY_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

e_guild_trends_site_long <- e_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
e_guild_site_results <- e_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

e_guild_site_results <- e_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(e_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in ecology guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Summarise
f_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, FEEDING_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
f_guild_trends <- f_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(FEEDING_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
f_guild_long <- f_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(f_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Feeding guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
f_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = f_guild_trends, family = poisson())
f_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = f_guild_trends)
f_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = f_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
f_results <- bind_rows(
  summary_table(f_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(f_lm_shannon,     "Shannon Diversity"),
  summary_table(f_lm_simpson,     "Simpson Diversity"),
  summary_table(f_lm_margalef,    "Margalef Richness"),
  summary_table(f_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(f_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in feeding guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
f_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, FEEDING_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

f_guild_trends_site <- f_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(FEEDING_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

f_guild_trends_site_long <- f_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
f_guild_site_results <- f_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

f_guild_site_results <- f_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(f_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in feeding guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Summarise
r_guild_species_year <- guilds %>%
  group_by(EVENT_DATE_YEAR, REPRODUCTIVE_GUILD) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

# Compute diversity per guild per year
r_guild_trends <- r_guild_species_year %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Guild_Richness = n_distinct(REPRODUCTIVE_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop") %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))

# Reshape 
r_guild_long <- r_guild_trends %>%
  pivot_longer(cols = -c(EVENT_DATE_YEAR), names_to = "Metric", values_to = "Value")

# Plot 
ggplot(r_guild_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1) +  
  geom_smooth(method = "loess", se = TRUE, linewidth = 1) + 
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Reproductive guild-level diversity metrics over time - Thames Estuary",
    subtitle = "LOESS smoothed trends grouped by ecological guild",
    x = "Year", y = "Metric value", color = "Ecology Guild") +
  theme_minimal()

# GLMs/LMMs for each metric
r_glm_richness   <- glm(Guild_Richness ~ EVENT_DATE_YEAR, data = r_guild_trends, family = poisson())
r_lm_abundance   <- lm(Guild_Abundance ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_shannon     <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_simpson     <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_margalef    <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = r_guild_trends)
r_lm_evenness    <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = r_guild_trends)

# Extract slope and p-values
summary_table <- function(model, metric_name) {
  coef_summary <- summary(model)$coefficients
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
  p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ "")
  data.frame(
    Metric = metric_name,
    Slope = slope,
    SE = se,
    CI_low = ci_low,
    CI_high = ci_high,
    P_value = round(p, 4),
    Significance = sig
  )
}

# Summary table
r_results <- bind_rows(
  summary_table(r_glm_richness,   "Guild Richness"),
  #summary_table(lm_abundance,   "Total Abundance"),
  summary_table(r_lm_shannon,     "Shannon Diversity"),
  summary_table(r_lm_simpson,     "Simpson Diversity"),
  summary_table(r_lm_margalef,    "Margalef Richness"),
  summary_table(r_lm_evenness,    "Pielou Evenness"))

# Plot
ggplot(r_results, aes(x = Slope, y = reorder(Metric, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_point(size = 3, color = "tomato") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
  geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
  labs(
    title = "Trend in reproductive guild diversity metrics over time - Thames Estuary",
    subtitle = "Slope ± 95% CI | GLM or LM by metric",
    x = "Slope (Change per year)",
    y = "Diversity metric",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
  theme_minimal()

# Where are the hotspots of ecological change?
r_guild_species_site <- guilds %>%
  group_by(EVENT_DATE_YEAR, REPRODUCTIVE_GUILD, SITE_PARENT_NAME) %>%
  summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")

r_guild_trends_site <- r_guild_species_site %>%
  group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
  summarise(
    Guild_Richness = n_distinct(REPRODUCTIVE_GUILD),
    Guild_Abundance = sum(FISH_COUNT),
    Shannon_Diversity = diversity(FISH_COUNT, index = "shannon"),
    Simpson_Diversity = diversity(FISH_COUNT, index = "simpson"),
    .groups = "drop"
  ) %>%
  mutate(
    Margalef_Richness = (Guild_Richness - 1) / log(Guild_Abundance),
    Pielou_Evenness = Shannon_Diversity / log(Guild_Richness),
    EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR))
  )

r_guild_trends_site_long <- r_guild_trends_site %>%
  pivot_longer(
    cols = c(Guild_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Guild_Abundance),
    names_to = "Metric", values_to = "Value"
  )

# Reusable model function (no change here)
site_model_summary <- function(df) {
  empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
                         P_value = NA_real_, Significance = NA_character_)
  if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
    return(empty_result)
  }
  model <- tryCatch(
    lm(Value ~ EVENT_DATE_YEAR, data = df),
    error = function(e) return(NULL)
  )
  if (is.null(model)) return(empty_result)
  coef_summary <- summary(model)$coefficients
  if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
  slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
  se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
  p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
  ci_low <- slope - 1.96 * se
  ci_high <- slope + 1.96 * se
  sig <- case_when(
    p <= 0.001 ~ "***",
    p <= 0.01  ~ "**",
    p <= 0.05  ~ "*",
    TRUE       ~ ""
  )
  
  tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}

# Apply models per site × metric
r_guild_site_results <- r_guild_trends_site_long %>%
  filter(!is.na(Value)) %>%
  group_by(SITE_PARENT_NAME, Metric) %>%
  group_modify(~ site_model_summary(.x)) %>%
  ungroup()

r_guild_site_results <- r_guild_site_results %>%
  filter(!is.na(Significance))

ggplot(r_guild_site_results, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
  geom_point(aes(color = Significance), size = 2) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
  facet_wrap(~ Metric, scales = "free_x") +
  labs(
    title = "Change in reproductive guild-level diversity metrics over time by site",
    subtitle = "Slope ± 95% CI | Linear model per site & metric",
    x = "Slope (Change per year)",
    y = "Site",
    caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
    color = "Significance") +
  theme_minimal()

# Functional redundancy

# Replace missing guild values to allow safe concatenation
guilds <- guilds %>%
  mutate(across(
    c(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD),
    ~replace_na(., "NA")
  ))

# Create functional entity by combining all guilds
guilds <- guilds %>%
  mutate(FUNCTIONAL_ENTITY = paste(ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD, sep = "_"))

# Count distinct species per functional entity per year
redundancy <- guilds %>%
  group_by(EVENT_DATE_YEAR, FUNCTIONAL_ENTITY) %>%
  summarise(
    Species_Richness = n_distinct(LATIN_NAME),
    Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
    .groups = "drop"
  )

# Summarise 
redundancy_summary <- redundancy %>%
  group_by(EVENT_DATE_YEAR) %>%
  summarise(
    Distinct_Functions = n_distinct(FUNCTIONAL_ENTITY),
    Redundant_Functions = sum(Species_Richness > 1),
    Mean_Species_per_Function = mean(Species_Richness),
    Total_Species = sum(Species_Richness),
    .groups = "drop"
  )

# Pivot 
redundancy_long <- redundancy_summary %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Metric", values_to = "Value")

# Plot 
ggplot(redundancy_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1, color = "steelblue") +
  geom_smooth(method = "loess", se = TRUE, linewidth = 1, color = "tomato") +
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Functional redundancy metrics over time - Thames Estuary",
    subtitle = "Based on combinations of ecological, feeding, vertical and reproductive guilds",
    x = "Year", y = "Metric value"
  ) +
  theme_minimal()

# Build species × trait binary matrix 
traits_matrix <- guilds %>%
  filter(!is.na(LATIN_NAME)) %>%                          
  select(LATIN_NAME, ECOLOGY_GUILD, FEEDING_GUILD, VERTICAL_DIST_GUILD, REPRODUCTIVE_GUILD) %>%
  distinct() %>%
  mutate(across(everything(), as.factor)) %>%
  pivot_longer(cols = -LATIN_NAME, names_to = "TraitType", values_to = "TraitValue") %>%
  mutate(Present = 1) %>%
  pivot_wider(names_from = c(TraitType, TraitValue), values_from = Present, values_fill = 0) %>%
  filter(!is.na(LATIN_NAME)) %>%                          
  column_to_rownames("LATIN_NAME")

abundance_matrix <- guilds %>%
  group_by(EVENT_DATE_YEAR, LATIN_NAME) %>%
  summarise(Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = LATIN_NAME, values_from = Abundance, values_fill = 0) %>%
  column_to_rownames("EVENT_DATE_YEAR")

shared_species <- intersect(rownames(traits_matrix), colnames(abundance_matrix))

# Filter 
traits_matrix_filtered <- traits_matrix[shared_species, , drop = FALSE]
abundance_matrix_filtered <- abundance_matrix[, shared_species, drop = FALSE]

fd_results <- dbFD(
  x = traits_matrix_filtered,
  a = abundance_matrix_filtered,
  calc.FRic = TRUE,
  m = "min",           
  stand.x = TRUE)
## FRic: Dimensionality reduction was required. The last 14 PCoA axes (out of 16 in total) were removed.
## FRic: Quality of the reduced-space representation = 0.3169416
# Convert to dataframe 
fd_df <- as.data.frame(fd_results[c("FRic", "FEve", "FDiv")])
fd_df$EVENT_DATE_YEAR <- as.numeric(rownames(fd_df))

fd_long <- fd_df %>%
  pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Index", values_to = "Value")

# Plot
ggplot(fd_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
  geom_line(linewidth = 1, color = "steelblue") +
  geom_smooth(method = "loess", se = TRUE, color = "tomato") +
  facet_wrap(~ Index, scales = "free_y") +
  labs(
    title = "Functional diversity indices - Thames Estuary",
    subtitle = "FRic (richness), FEve (evenness), FDiv (divergence)",
    x = "Year", y = "Value"
  ) +
  theme_minimal()

Sites overview

Which are the common species across sites?

top_common_species <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(SITE_PARENT_NAME)) %>%
  count(SPECIES_NAME, SITE_PARENT_NAME) %>%
  group_by(SPECIES_NAME) %>%
  summarise(n_sites = n_distinct(SITE_PARENT_NAME), .groups = "drop") %>%
  arrange(desc(n_sites)) %>%
  pull(SPECIES_NAME)

site_species_list <- counts_thames_filtered %>%
  filter(SPECIES_NAME %in% top_common_species) %>%
  distinct(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    species_list = paste(sort(unique(SPECIES_NAME)), collapse = ", "),
    n_species = n(),
    .groups = "drop")

site_species_coords <- site_species_list %>%
  left_join(
    sites %>% distinct(SITE_PARENT_NAME, TOP_TIER_SITE, SITE_RANKED_EASTING, SITE_RANKED_NORTHING),
    by = "SITE_PARENT_NAME") %>%
  filter(!is.na(SITE_RANKED_EASTING))

site_species_sf <- site_species_coords %>%
  st_as_sf(coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(4326)

site_species_sf$lon <- st_coordinates(site_species_sf)[, 1]
site_species_sf$lat <- st_coordinates(site_species_sf)[, 2]

leaflet(site_species_sf) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    radius = ~scales::rescale(n_species, to = c(4, 10)),
    fillColor = ~colorNumeric("Blues", domain = site_species_sf$n_species)(n_species),
    fillOpacity = 0.8,
    stroke = TRUE,
    color = "black",
    popup = ~paste0(
      "<b>Site:</b> ", SITE_PARENT_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b>Species Detected:</b> ", n_species, " / ", length(top_common_species), "<br>",
      "<b>Species List:</b><br>", species_list)) %>%
  addLegend(
    position = "bottomright",
    pal = colorNumeric("Blues", domain = site_species_sf$n_species),
    values = ~n_species,
    title = "Top Species Detected",
    opacity = 1)
site_species_table <- site_species_coords %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, n_species, species_list) %>%
  arrange(desc(n_species)) %>%
  gt() %>%
  tab_header(
    title = "Top common species detected per site",
    subtitle = paste0("Based on presence of top ", length(top_common_species), " most widespread species")
  ) %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE = "Estuary Zone",
    n_species = "No. of Top Species",
    species_list = "Species Detected"
  ) %>%
  fmt_number(columns = n_species, decimals = 0) %>%
  tab_options(
    table.font.size = "small",
    data_row.padding = px(4),
    column_labels.font.weight = "bold"
  ) %>%
  data_color(
    columns = n_species,
    colors = scales::col_numeric(palette = "Blues", domain = NULL)
  )

site_species_table
Top common species detected per site
Based on presence of top 58 most widespread species
Site Estuary Zone No. of Top Species Species Detected
West Thurrock Thames Middle 31 3-spined stickleback, 5-bearded rockling, Big scaled sand smelt, Cod, Common bream, Common goby, Dab, Dover sole, European eel, Flounder, Goby sp., Golden grey mullet, Greater pipefish, Grey mullet sp., Herring, Hooknose / Pogge, Lesser (Nillsons) pipefish, Long-spined sea scorpion, Perch, Plaice, Red mullet, Roach, Sand goby, Sand smelt, Sandeel sp., Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Transparent goby
Richmond Thames Upper 29 3-spined stickleback, Atlantic salmon, Barbel, Big scaled sand smelt, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common carp varieties, Common goby, Dace, European eel, European eels > elvers, Flounder, Goby sp., Gudgeon, Minnow, Mirror carp, Perch, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Smelt, Stickleback sp., Thin lipped grey mullet
Greenwich Thames Middle 28 3-spined stickleback, Big scaled sand smelt, Brown / sea trout, Chub, Common bream, Common goby, Dace, Dover sole, European eel, European eels > elvers, Flounder, Gilthead bream, Grey mullet sp., Herring, Perch, Pike, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Short-snouted seahorse, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Zander
Kew Thames Upper 27 3-spined stickleback, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, European elvers, Flounder, Goby sp., Gudgeon, Mirror carp, Perch, Pike, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Stone loach, Thick lipped grey mullet, Thin lipped grey mullet, Zander
Battersea Thames Upper 26 3-spined stickleback, Big scaled sand smelt, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, European elvers, Flounder, Goby sp., Gudgeon, Perch, Roach, Roach x common bream hybrid, Rudd, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet, Zander
Chiswick Thames Upper 25 3-spined stickleback, Bleak, Brown / sea trout, Bullhead, Chub, Common bream, Common goby, Dace, European eel, European eels > elvers, Flounder, Goby sp., Grey mullet sp., Minnow, Perch, Pike, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Stone loach, Thin lipped grey mullet
Denton Wharf Thames Middle 18 Anchovy, Common goby, Dover sole, European eel, European eels > elvers, Flounder, Greater pipefish, Grey mullet sp., Herring, Herring like sp., Plaice, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet, Transparent goby
Hammersmith Thames Upper 18 10-spined stickleback, 3-spined stickleback, Bleak, Common bream, Common carp varieties, Common goby, Dace, European eel, Flounder, Perch, Roach, Roach x common bream hybrid, Sand goby, Sand smelt, Sea bass, Smelt, Tench varieties, Thin lipped grey mullet
Greenhithe Thames Middle 17 3-bearded rockling, 3-spined stickleback, Common goby, Dover sole, European eel, Flounder, Herring, Long-spined sea scorpion, Perch, Plaice, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet
Brentford Thames Upper 16 3-spined stickleback, Bleak, Brown / sea trout, Chub, Common bream, Common goby, Dace, European eel, Flounder, Gudgeon, Perch, Roach, Sand smelt, Sea bass, Smelt, Thin lipped grey mullet
Stanford-le-Hope Beach Thames Lower 15 Anchovy, Common goby, Dover sole, European eel, Flounder, Herring, Painted goby, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thick lipped grey mullet, Thin lipped grey mullet, Transparent goby
Thamesmead Thames Middle 13 3-spined stickleback, Common bream, Common goby, Dace, European eel, Flounder, Roach, Sand goby, Sand smelt, Sea bass, Smelt, Sprat, Thin lipped grey mullet
Chelsea Creek Thames Upper 12 Common bream, Common carp varieties, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand smelt, Sea bass, Smelt, Thin lipped grey mullet
Putney Thames Upper 11 3-spined stickleback, Brown / sea trout, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand goby, Sand smelt, Sea bass
Vauxhall Thames Middle 11 3-spined stickleback, Atlantic salmon, Common goby, Dace, European eel, Flounder, Perch, Roach, Sand goby, Sand smelt, Sea bass
Teddington Weir Thames Upper 10 3-spined stickleback, Bleak, Common bream, Common goby, Dace, Flounder, Gudgeon, Perch, Roach, Sea bass
Chelsea Thames Upper 8 3-spined stickleback, Common bream, Dace, Flounder, Roach, Sand goby, Sand smelt, Sea bass
Mucking Thames Middle 8 European eels > elvers, Flounder, Herring, Sand smelt, Sea bass, Smelt, Thick lipped grey mullet, Thin lipped grey mullet
Fulham Thames Upper 5 3-spined stickleback, Dace, European eel, Flounder, Roach
Isleworth Thames Upper 5 3-spined stickleback, Dace, Gudgeon, Perch, Roach
Petersham Thames Upper 4 3-spined stickleback, Dace, Perch, Roach

Nursery score

The nursery score is a composite metric designed to evaluate the importance of sites as nursery habitats for juvenile fish. It integrates three key components: the mean length of all fish caught at a site, the mean length of juveniles, and the total number of juvenile individuals observed.

First, juvenile fish were identified for each species using a species-specific threshold, defined as the 25th percentile of lengths for that species across all sites. Then, for each site, the average total fish length, average juvenile length, and juvenile abundance were calculated. These three variables were rescaled to a 0–1 range (with lower mean lengths and higher juvenile counts contributing to higher scores), and the final nursery score was computed as the average of the three rescaled values.

This score highlights sites where juvenile fish are both abundant and relatively small.

site_length_stats <- lengths_thames_filtered %>%
  group_by(SITE_PARENT_NAME, TOP_TIER_SITE) %>%
  summarise(
    mean_length = mean(FISH_LENGTH, na.rm = TRUE),
    n_fish = n(),
    .groups = "drop")

lengths_tagged <- lengths_thames_filtered %>%
  group_by(SPECIES_NAME) %>%
  mutate(juvenile_threshold = quantile(FISH_LENGTH, 0.25, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(is_juvenile = FISH_LENGTH <= juvenile_threshold)

juvenile_site_stats <- lengths_tagged %>%
  filter(is_juvenile) %>%
  group_by(SITE_PARENT_NAME, TOP_TIER_SITE) %>%
  summarise(
    mean_juvenile_length = mean(FISH_LENGTH, na.rm = TRUE),
    n_juveniles = n(),
    .groups = "drop")

nursery_combined <- site_length_stats %>%
  inner_join(juvenile_site_stats, by = c("SITE_PARENT_NAME", "TOP_TIER_SITE")) %>%
  mutate(
    score_length = rescale(-mean_length, to = c(0, 1)),
    score_juvenile_size = rescale(-mean_juvenile_length, to = c(0, 1)),
    score_juvenile_count = rescale(n_juveniles, to = c(0, 1)),
    nursery_score = round((score_length + score_juvenile_size + score_juvenile_count) / 3, 3)) %>%
  arrange(desc(nursery_score))

sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")

sites_sf <- sites %>%
  filter(!is.na(SITE_RANKED_EASTING), !is.na(SITE_RANKED_NORTHING)) %>%
  st_as_sf(coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326) %>%
  mutate(
    lon = st_coordinates(.)[, 1],
    lat = st_coordinates(.)[, 2]
  ) %>%
  select(SITE_PARENT_NAME, lat, lon)

nursery_map <- nursery_combined %>%
  left_join(sites_sf, by = "SITE_PARENT_NAME") %>%
  filter(!is.na(lat), !is.na(lon))

leaflet(nursery_map) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    radius = ~rescale(nursery_score, to = c(5, 12)),  
    fillColor = ~colorNumeric("YlOrRd", domain = nursery_map$nursery_score)(nursery_score),
    fillOpacity = 0.95,
    color = "black", stroke = TRUE, weight = 0.8,
    popup = ~paste0(
      "<b>Site:</b> ", SITE_PARENT_NAME, "<br>",
      "<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
      "<b><span style='color:darkred'>Nursery Score:</span></b> ", nursery_score, "<br>",
      "<b>Mean Length:</b> ", round(mean_length, 1), " mm<br>",
      "<b>Juvenile Length:</b> ", round(mean_juvenile_length, 1), " mm<br>",
      "<b>Juvenile Count:</b> ", n_juveniles
    )
  ) %>%
  addLegend(
    "bottomright",
    pal = colorNumeric("YlOrRd", domain = nursery_map$nursery_score),
    values = nursery_map$nursery_score,
    title = "Nursery Score (0–1)",
    opacity = 1
  )
nursery_combined %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, mean_length, mean_juvenile_length, n_juveniles, nursery_score) %>%
  arrange(desc(nursery_score)) %>%
  gt() %>%
  tab_header(
    title = "Nursery Site Ranking",
    subtitle = "Based on fish length, juvenile size, and juvenile abundance"
  ) %>%
  fmt_number(
    columns = c(mean_length, mean_juvenile_length, nursery_score), decimals = 1
  ) %>%
  fmt_number(columns = n_juveniles, decimals = 0) %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE = "Estuary Zone",
    mean_length = "Mean Length (mm)",
    mean_juvenile_length = "Juvenile Length (mm)",
    n_juveniles = "Juvenile Count",
    nursery_score = "Nursery Score"
  ) %>%
  data_color(
    columns = nursery_score,
    colors = scales::col_numeric(palette = "YlOrRd", domain = NULL)
  ) %>%
  tab_options(
    table.font.size = "small",
    data_row.padding = px(5)
  )
Nursery Site Ranking
Based on fish length, juvenile size, and juvenile abundance
Site Estuary Zone Mean Length (mm) Juvenile Length (mm) Juvenile Count Nursery Score
Richmond Thames Upper 53.1 30.5 2,312 0.7
Putney Thames Upper 31.8 20.8 80 0.6
Fulham Thames Upper 26.9 22.3 14 0.6
Chiswick Thames Upper 46.9 30.4 1,098 0.6
Chelsea Thames Upper 33.9 26.1 95 0.6
Hammersmith Thames Upper 38.3 27.8 407 0.6
Battersea Thames Upper 50.6 29.5 926 0.5
Brentford Thames Upper 42.7 30.7 669 0.5
Greenwich Thames Middle 54.5 35.9 1,336 0.5
Thamesmead Thames Middle 41.4 29.0 159 0.5
Kew Thames Upper 59.0 35.1 1,152 0.5
Vauxhall Thames Middle 46.7 31.1 31 0.4
Teddington Weir Thames Upper 53.7 32.0 53 0.4
Greenhithe Thames Middle 51.1 37.5 133 0.3
West Thurrock Thames Middle 61.0 39.3 671 0.3
Denton Wharf Thames Middle 67.0 38.2 206 0.2
Stanford-le-Hope Beach Thames Lower 65.9 40.4 192 0.2
Chelsea Creek Thames Upper 62.4 46.8 211 0.2
Mucking Thames Middle 83.7 43.2 29 0.0

# Convert to sf object
sites_sf <- st_as_sf(sites, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
  st_transform(crs = 4326)

coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]

richness_trends <- site_results_clean %>%
  filter(Metric == "Species_Richness") %>%
  select(SITE_PARENT_NAME, Slope, Significance)

# Add direction category
richness_trends <- richness_trends %>%
  mutate(Richness_Trend = case_when(
    Significance != "" & Slope > 0 ~ "Increasing",
    Significance != "" & Slope < 0 ~ "Decreasing",
    TRUE ~ "No Trend"
  ))

juvenile_hotspots <- nursery_combined %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE, score_length, nursery_score, n_juveniles)

site_overlay_data <- sites_sf %>%
  left_join(richness_trends, by = "SITE_PARENT_NAME") %>%
  left_join(juvenile_hotspots, by = "SITE_PARENT_NAME")

# Diversity
diversity_summary <- diversity_site_year %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    Species_Richness = mean(Species_Richness, na.rm = TRUE),
    Shannon_Diversity = mean(Shannon_Diversity, na.rm = TRUE),
    Simpson_Diversity = mean(Simpson_Diversity, na.rm = TRUE),
    Margalef_Richness = mean(Margalef_Richness, na.rm = TRUE),
    Pielou_Evenness = mean(Pielou_Evenness, na.rm = TRUE),
    Total_Abundance = mean(Total_Abundance, na.rm = TRUE),
    .groups = "drop"
  )

# Combine
site_rankings_full <- site_overlay_data %>%
  st_drop_geometry() %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE.x, nursery_score, n_juveniles) %>%
  filter(!is.na(nursery_score)) %>%
  left_join(diversity_summary, by = "SITE_PARENT_NAME") %>%
  mutate(
    nursery_score = round(nursery_score, 2),
    across(c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
             Margalef_Richness, Pielou_Evenness, Total_Abundance),
           ~ round(.x, 2))
  )

# Rank
site_rankings_full %>%
  distinct(SITE_PARENT_NAME, .keep_all = TRUE) %>%
  mutate(
    TOP_TIER_SITE.x = factor(TOP_TIER_SITE.x,
                             levels = c("Thames Upper", "Thames Middle", "Thames Lower"))
  ) %>%
  arrange(TOP_TIER_SITE.x) %>%
  rename(
    `Nursery Score` = nursery_score,
    `Juvenile Abundance` = n_juveniles,
    `Total Abundance` = Total_Abundance
  ) %>%
  gt() %>%
  tab_header(title = "Site rankings by juvenile and diversity metrics") %>%
  cols_label(
    SITE_PARENT_NAME = "Site",
    TOP_TIER_SITE.x = "Zone",
    `Nursery Score` = "Nursery score",
    Species_Richness = "Richness",
    Shannon_Diversity = "Shannon",
    Simpson_Diversity = "Simpson",
    Margalef_Richness = "Margalef",
    Pielou_Evenness = "Evenness",
    `Juvenile Abundance` = "Juvenile abundance",
    `Total Abundance` = "Total abundance"
  ) %>%
  data_color(
    columns = vars(
      `Nursery Score`,
      Species_Richness,
      Shannon_Diversity,
      Simpson_Diversity,
      Margalef_Richness,
      Pielou_Evenness,
      `Juvenile Abundance`,
      `Total Abundance`
    ),
    colors = scales::col_numeric(palette = "Oranges", domain = NULL)
  )
Site rankings by juvenile and diversity metrics
Site Zone Nursery score Juvenile abundance Richness Shannon Simpson Margalef Evenness Total abundance
Battersea Thames Upper 0.55 926 8.35 1.98 0.83 1.49 0.96 213.26
Brentford Thames Upper 0.54 669 6.62 1.75 0.80 1.11 0.98 174.25
Chelsea Creek Thames Upper 0.15 211 7.50 1.86 0.82 1.34 0.99 322.00
Chelsea Thames Upper 0.57 95 8.00 2.02 0.86 1.39 0.97 152.00
Chiswick Thames Upper 0.58 1098 8.48 2.02 0.85 1.70 0.96 142.15
Fulham Thames Upper 0.65 14 5.00 1.61 0.80 1.23 1.00 26.00
Hammersmith Thames Upper 0.57 407 7.57 1.91 0.83 1.38 0.97 156.86
Kew Thames Upper 0.46 1152 8.67 2.03 0.85 1.46 0.96 303.63
Putney Thames Upper 0.65 80 7.00 1.80 0.81 1.47 0.98 68.00
Richmond Thames Upper 0.72 2312 8.97 2.05 0.85 1.32 0.96 611.71
Teddington Weir Thames Upper 0.37 53 5.67 1.70 0.81 1.05 0.99 115.00
Greenhithe Thames Middle 0.33 133 11.00 2.29 0.89 2.10 0.97 129.33
Greenwich Thames Middle 0.50 1336 10.14 2.21 0.88 1.89 0.96 177.93
Mucking Thames Middle 0.05 29 4.33 1.43 0.75 0.83 0.98 62.00
Thamesmead Thames Middle 0.50 159 10.00 2.25 0.89 1.76 0.98 165.00
Vauxhall Thames Middle 0.42 31 7.50 1.91 0.83 1.77 0.96 38.50
West Thurrock Thames Middle 0.32 671 9.54 2.13 0.87 1.57 0.96 284.58
Denton Wharf Thames Middle 0.23 206 8.90 2.07 0.86 1.60 0.96 214.50
Stanford-le-Hope Beach Thames Lower 0.21 192 7.38 1.89 0.83 1.35 0.95 288.75

Which sites are ecologically rich and function well as nurseries?

The composite score represents an integrated index that ranks sites based on a combination of ecological and nursery-related metrics. It is calculated by first selecting a suite of indicators—namely nursery score, species richness, Shannon and Simpson diversity indices, Margalef richness, Pielou evenness, juvenile abundance, total fish abundance, and the number of widespread species observed per site.

To ensure comparability across these differently scaled variables, each metric is rescaled to a 0–1 range. The composite score is then derived by averaging these rescaled values, resulting in a balanced metric where higher scores indicate sites that perform well both in terms of biodiversity and nursery function.

top_common_species <- counts_thames_filtered %>%
  filter(!is.na(SPECIES_NAME), !is.na(SITE_PARENT_NAME)) %>%
  count(SPECIES_NAME, SITE_PARENT_NAME) %>%
  group_by(SPECIES_NAME) %>%
  summarise(n_sites = n_distinct(SITE_PARENT_NAME), .groups = "drop") %>%
  arrange(desc(n_sites)) %>%
  pull(SPECIES_NAME)

site_species_list <- counts_thames_filtered %>%
  filter(SPECIES_NAME %in% top_common_species) %>%
  distinct(SITE_PARENT_NAME, SPECIES_NAME) %>%
  group_by(SITE_PARENT_NAME) %>%
  summarise(
    species_list = paste(sort(unique(SPECIES_NAME)), collapse = ", "),
    n_species = n(),
    .groups = "drop")

site_rankings_with_species <- site_rankings_full %>%
  left_join(
    site_species_list %>% select(SITE_PARENT_NAME, n_species),
    by = "SITE_PARENT_NAME"
  )

site_rankings_scaled <- site_rankings_with_species %>%
  mutate(across(
    c(nursery_score, Species_Richness, Shannon_Diversity, Simpson_Diversity,
      Margalef_Richness, Pielou_Evenness, n_juveniles, Total_Abundance, n_species),
    ~ rescale(.x, to = c(0, 1), na.rm = TRUE),
    .names = "scaled_{.col}"
  )) %>%
  rowwise() %>%
  mutate(
    composite_score = mean(c_across(starts_with("scaled_")), na.rm = TRUE)
  ) %>%
  ungroup()

top_3_sites <- site_rankings_scaled %>%
  group_by(TOP_TIER_SITE.x) %>%
  slice_max(composite_score, n = 7, with_ties = FALSE) %>%
  arrange(TOP_TIER_SITE.x, desc(composite_score)) %>%
  ungroup()

top_3_sites %>%
  select(SITE_PARENT_NAME, TOP_TIER_SITE.x, composite_score) %>%
  rename(
    Site = SITE_PARENT_NAME,
    Zone = TOP_TIER_SITE.x,
    `Composite Score` = composite_score
  ) %>%
  distinct(Site, .keep_all = TRUE) %>%
  arrange(desc(`Composite Score`)) %>%
  mutate(Rank = row_number()) %>%
  select(Rank, everything()) %>%
  gt() %>%
  tab_header(title = "Top 3 Sites per Estuary Zone (Ranked by Composite Score)") %>%
  fmt_number(columns = `Composite Score`, decimals = 2) %>%
  data_color(
    columns = `Composite Score`,
    colors = scales::col_numeric(palette = "Oranges", domain = NULL)
  ) %>%
  tab_options(table.font.size = "small", data_row.padding = px(4))
Top 3 Sites per Estuary Zone (Ranked by Composite Score)
Rank Site Zone Composite Score
1 Richmond Thames Upper 0.74
2 Greenwich Thames Middle 0.68
3 Greenhithe Thames Middle 0.61
4 Thamesmead Thames Middle 0.60
5 Kew Thames Upper 0.58
6 Chiswick Thames Upper 0.57
7 Stanford-le-Hope Beach Thames Lower 0.35

Sampling sites

Sites overview ArcGIS map

setwd("C:/Users/bodna/OneDrive - University College London/Documents/PhD/PhD docs")

site_data <- read_excel("locations.xlsx")

valid_types <- c("Natural", "Degraded", "Restored", "Engineered")
site_data <- site_data %>%
  filter(Type %in% valid_types, !is.na(Lat), !is.na(Long))

m <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron)

for (site_type in valid_types) {
  type_data <- site_data %>% filter(Type == site_type)
  color <- case_when(
    site_type == "Natural" ~ "green",
    site_type == "Degraded" ~ "red",
    site_type == "Restored" ~ "orange",
    site_type == "Engineered" ~ "blue"
  )
  
  m <- m %>%
    addCircleMarkers(
      data = type_data,
      lng = ~Long, lat = ~Lat,
      group = site_type,
      radius = 6,
      color = color,
      fillOpacity = 0.8,
      label = ~Site,
      popup = ~paste0(
        "<b>", Site, "</b><br>",
        "Habitat: ", Habitat, "<br>",
        "Type: ", Type, "<br>",
        "Salinity: ", Salinity, "<br>",
        "Access: ", Access, "<br>",
        "Oversight: ", Oversight
      )
    )
}

m <- m %>%
  addLayersControl(
    overlayGroups = valid_types,
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("green", "red", "orange", "blue"),
    labels = valid_types,
    title = "Site type")

m

Suggested sampling sites

habitats <- data.frame(
  Status = c("Natural", "Degraded", "Restored/Engineered"),
  Fresh = c("Syon Park", "Richmond", "Battersea Reach Terracing"),
  Brackish = c("N/A", "Bank", "Point Wharf / Greenwich MT"),
  Seagrass = c("Seasalter", "", "Copperhouse"),
  Saltmarsh = c("St Mary's Bay Beach", "Grain", "Canvey / Salt Fleet Flats"),
  Oyster = c("Sheppey North", "", "Sheppey West"),
  check.names = FALSE)

kable(habitats, align = "lcccccc", caption = "") %>%
  add_header_above(c(" " = 1, " " = 1, " " = 1, " " = 1, " " = 1, " " = 1),
                   bold = FALSE, escape = FALSE) %>%
  add_header_above(c(" " = 1, " " = 1, " " = 1, "Marine" = 3)) %>%
  kable_styling(full_width = FALSE, position = "left") %>%
  column_spec(1, bold = TRUE)
Marine
Status Fresh Brackish Seagrass Saltmarsh Oyster
Natural Syon Park N/A Seasalter St Mary’s Bay Beach Sheppey North
Degraded Richmond Bank Grain
Restored/Engineered Battersea Reach Terracing Point Wharf / Greenwich MT Copperhouse Canvey / Salt Fleet Flats Sheppey West
# Load 
setwd("C:/Users/bodna/OneDrive - University College London/Documents/PhD/PhD docs")
site_data <- read_excel("locations.xlsx")

# Filter
valid_types <- c("Natural", "Degraded", "Restored", "Engineered")

site_data <- site_data %>%
  filter(Type %in% valid_types, !is.na(Lat), !is.na(Long))

# Suggested sites
suggested_sites <- c(
  "Syon Park", "Richmond", "Battersea Reach",
  "Bankside", "Point Wharf", "Greenwich MT",
  "Seasalter", "Copperhouse",
  "St Mary's beach", "Grain", "Canvey", "Salt Fleet Flats",
  "Sheppey N", "Sheppey W"
) %>%
  str_trim() %>%
  unique()

# Filter 
suggested_site_data <- site_data %>%
  filter(Site %in% suggested_sites)

# Create map
m <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron)

# iterate
for (site_type in valid_types) {
  type_data <- suggested_site_data %>% filter(Type == site_type)
  color <- case_when(
    site_type == "Natural" ~ "green",
    site_type == "Degraded" ~ "red",
    site_type == "Restored" ~ "orange",
    site_type == "Engineered" ~ "blue"
  )
  
  m <- m %>%
    addCircleMarkers(
      data = type_data,
      lng = ~Long, lat = ~Lat,
      group = site_type,
      radius = 6,
      color = color,
      fillOpacity = 0.85,
      label = ~Site,
      popup = ~paste0(
        "<b>", Site, "</b><br>",
        "Habitat: ", Habitat, "<br>",
        "Type: ", Type, "<br>",
        "Salinity: ", Salinity, "<br>",
        "Access: ", Access, "<br>",
        "Oversight: ", Oversight
      )
    )
}

m <- m %>%
  addLayersControl(
    overlayGroups = valid_types,
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("green", "red", "orange", "blue"),
    labels = valid_types,
    title = "Site type")

m